# Coppock, Alexander, Alan S. Gerber, Donald P. Green, and Holger L. Kern. 
# Combining Double Sampling and Bounds to Address Non-Ignorable Missing Outcomes in Randomized Experiments. 
# Political Analysis (in press)
# Main text analysis. Tables 1, 2, and 3.  Figure 2.

rm(list=ls())

# Set your working directory to the replication archive
# setwd() 

levendusky_mturk_clean <- read.csv("levendusky_mturk_clean.csv")

library(ggplot2)
library(xtable)
library(dplyr)


# Uncomment to install latest version
# install.packages("devtools")
# devtools::install_github("acoppock/attrition")

# OR

# Uncomment to install source
# install.packages("attrition_0.0.0.9000.tar.gz", repos = NULL, type="source")

library(attrition)

# Make Table 1 ------------------------------------------------------------

table_1_rows_1and2 <- 
  levendusky_mturk_clean %>%
  filter(Z_Levendusky != "placebo") %>%
  group_by(Z_lev) %>%
  summarize(wave_1_total = n(),
            wave_2_responded = sum(R1),
            wave_2_didntrespond = sum(1-R1),
            wave_2_reattempted = sum(Attempt),
            wave_2_respondedtoattempt = sum(R2))

table_1_row_3 <- 
  levendusky_mturk_clean %>%
  filter(Z_Levendusky != "placebo") %>%
  summarize(Z_lev = "Total",
            wave_1_total = n(),
            wave_2_responded = sum(R1),
            wave_2_didntrespond = sum(1-R1),
            wave_2_reattempted = sum(Attempt),
            wave_2_respondedtoattempt = sum(R2))

table_1 <- rbind(table_1_rows_1and2, table_1_row_3)
table_1

# Make Table 2 ------------------------------------------------------------

table_2 <- 
  levendusky_mturk_clean %>%
  filter(Z_Levendusky != "placebo") %>%
  group_by(Z_lev) %>%
  summarize(IR_mean = mean(L_dif_w2[R1==1]),
            IR_sd = sd(L_dif_w2[R1==1]),
            IR_N = length(L_dif_w2[R1==1]),
            DS_mean = mean(L_dif_w2[R2==1]),
            DS_sd = sd(L_dif_w2[R2==1]),
            DS_N = length(L_dif_w2[R2==1]))
table_2

# Make Table 3 -------------------------------------------------------


# Extreme value bounds in absence of double sampling

cis_1 <- estimator_ev(Y = L_dif_w2, Z = Z1, R = R1, minY = 0, maxY = 6, alpha = .05,
                      data = subset(levendusky_mturk_clean, !is.na(Z1)))

# Extreme value bounds with double sampling

cis_2 <- estimator_ds(Y = L_dif_w2, Z = Z1, R1 = R1, Attempt = Attempt, R2 = R2, minY = 0, maxY = 6, alpha = .05,
                      data = subset(levendusky_mturk_clean, !is.na(Z1)))

# Extreme value bounds with double sampling and post-stratification

cis_3 <- estimator_ds(Y = L_dif_w2, Z = Z1, R1 = R1, Attempt = Attempt, R2 = R2, alpha = .05,
                      strata = pid_3_recoded, minY = 0, maxY = 6,
                      data = subset(levendusky_mturk_clean, !is.na(Z1)))

table_3 <- cbind(no_ds = cis_1, 
                       ds = cis_2, 
                       ds_ps = cis_3)

row.names(table_3) <- c("95% CI Lower Bound","95% CI Upper Bound",
                              "Worst-Case Bound: Low Estimate", "Worst-Case Bound: High Estimate",
                              "Variance of Low Estimate", "Variance of High Estimate")

table_3


# Claim in section 4.2 of a "65 percent reduction in confidence interval width."
widths <- mturk_results[2,] - mturk_results[1,]
(widths[1] - widths[3] )/ widths[1]



# Sensitivity ------------------------------------------------------------

sens_test_90 <-
  sensitivity_ds(
    Y = L_dif_w2,
    Z = Z1,
    R1 = R1,
    R2 = R2,
    Attempt = Attempt,
    minY = 0,
    maxY = 6,
    alpha = .10,
    data = subset(levendusky_mturk_clean,!is.na(Z1))
  )


# Figure 2
sens_test_90$sensitivity_plot


